Plotly-based plots

I think it's a bit easier to get fully customised, publication-quality plots using Plotly than it is using Holoviews (although I find the latter much more efficient when analysing data). Note that the function definitions have been moved to below the plots.

In [1]:
import math
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
from src import data, likelihood, plot
pio.templates.default = 'none'

Quick data preparation

In [4]:
start, end = None, None
std = 7/2

# ZA
regions = data.za_provinces(top=4)
dfs = [data.za_counts(r, start=start, end=end, min_positive=1) for r in regions]
events = plot.event_lines(plot.ZA_EVENTS)

# USA
# regions = data.usa_states(top=4)
# dfs = [data.usa_counts(r, start=start, end=end, min_positive=1) for r in regions]
# events = plot.event_lines([])

dfs = [data.smooth(df, std=std) for df, r in zip(dfs, regions)]
R, C = likelihood.opt(dfs, col='smoothed')
print(f'Max. likelihood r = {R}, c = {C}')
Max. likelihood r = 476, c = 2.409933894511878
In [5]:
start = min(df.date.min() for df in dfs)
figs = [plot_infections(df, start, r, showlegend=(i == 0)) for i, (df, r) in enumerate(zip(dfs, regions))]
fig = subplots(figs, 'New Infections', width=800, height=1000)
# fig = subplots(figs, 'New Infections', cols=2, height=750, hspace=0.075, vspace=0.15)
fig.update_layout(margin=dict(t=80))
In [6]:
figs = [plot_reproduction(df, R, C, col='smoothed', start=start, title=r, showlegend=(i == 0))
        for i, (df, r) in enumerate(zip(dfs, regions))]
fig = subplots(figs, 'Effective Reproduction Numbers', width=800, height=1000)
# fig = subplots(figs, 'Effective Reproduction Number', cols=2, height=750, hspace=0.075, vspace=0.15)
fig.update_layout(margin=dict(t=80))
In [ ]:
# fig.write_image('provincial.png', scale=2)

Functions

In [2]:
YLIM = (0, 4)
BLUE = 'rgba(73,121,177,{})'
RED = 'rgba(192,0,0,{})'
BLUEFILL = 'rgba(205,218,233,0.8)'
GREYFILL = 'rgba(192,192,192,0.2)'
BLACKFILL = 'rgba(0,0,0,0.2)'
BURD = [(0,'rgb(0,0,192)'), (1,'rgb(192,0,0)')]
YLORBR = ['rgba(153,52,4,0.2)', 'rgba(217,95,14,0.2)', 'rgba(254,153,41,0.2)',
          'rgba(254,217,142,0.2)', 'rgba(255,255,212,0.2)'] # from ColorBrewer.
In [3]:
def plot_range(x0, x1, y0, y1, name, colour, showlegend=True):
    return go.Scatter(
        x=[x0, x0, x1, x1], y=[y0, y1, y1, y0], name=name, showlegend=showlegend, legendgroup='range',
        mode='lines', line_width=0, fillcolor=colour, fill='toself'
    )

def plot_boundary(x, y0, y1, colour):
    return go.Scatter(x=[x, x], y=[y0, y1], showlegend=False, mode='lines', line_width=1, line_color=colour)
    

def plot_lockdown(x1, y0, y1, showlegend=True):
    return [
        plot_range('2020-03-23', '2020-03-27', y0, y1, 'lockdown announced', GREYFILL, showlegend=showlegend),
        plot_range('2020-03-27', '2020-05-01', y0, y1, 'level 5', YLORBR[0], showlegend=showlegend),
        plot_range('2020-05-01', '2020-06-01', y0, y1, 'level 4', YLORBR[1], showlegend=showlegend),
        plot_range('2020-06-01', x1, y0, y1, 'level 3', YLORBR[2], showlegend=showlegend),
        plot_boundary('2020-03-23', y0, y1, BLACKFILL),
        plot_boundary('2020-03-27', y0, y1, BLACKFILL),
        plot_boundary('2020-05-01', y0, y1, BLACKFILL),
        plot_boundary('2020-06-01', y0, y1, BLACKFILL)
    ]

def plot_infections(df, start=None, title='New Infections', showlegend=True):
    if start is None:
        start = df.date.min()
    ymax = max(df.new.max(), df.smoothed.max())
    args = dict(showlegend=showlegend, legendgroup='infections')
    return go.Figure(
        data = [
            *plot_lockdown(df.date.max(), 0, ymax, showlegend=False),
            go.Scatter(x=df.date, y=df.new, name='raw', **args,
                       line_width=2, line_color=BLUE.format(0.8), line_dash='6px'),
            go.Scatter(x=df.date, y=df.smoothed, name='smoothed', **args, mode='lines',
                       line_width=2.5, line_color=RED.format(1))
        ],
        layout = dict(
            title = title,
            xaxis = dict(range=(start, df.date.max()), showgrid=False),
            yaxis = dict(range=(0, ymax)),
            legend = dict(orientation='h', traceorder='normal'),
        )
    )

def plot_ratio(df, stds, r, c, col='new', a=1, b=3, gamma=data.GAMMA,
               start=None, title='New Infection Ratio', showlegend=True):
    df = df.copy()
    if start is None:
        start = df.date.min()
    bound = math.exp(-gamma)
    mn, mx = bound, bound
    smths = [f'smoothed {std}' for std in stds]
    ratios = [f'σ = {std:.1f}' for std in stds]
    traces = []
    for std, smth, ratio in zip(stds, smths, ratios):
        data.smooth(df, raw=col, smth=smth, std=std)
        df[ratio] = (df[smth] + 1)/df[smth].shift()
        traces.append(go.Scatter(x=df.date, y=df[ratio], name=ratio, showlegend=showlegend))
        mn, mx = min(mn, df[ratio].min()), max(mx, df[ratio].max())
    ylim = (mn - 0.1*(mx-mn), mx + 0.1*(mx-mn))
    return go.Figure(
        data = traces + [
            go.Scatter(x=[start, df.date.max()], y=[bound, bound], showlegend=False,
                       mode='lines', line_width=1.5, line_color=RED.format(1))
        ],
        layout = dict(
            title = title,
            xaxis = dict(range=(start, df.date.max()), showgrid=True),
            yaxis = dict(range=ylim)
        )
    )

def plot_posterior(df, r, c, col='smoothed', a=1, b=3, gamma=data.GAMMA,
                   start=None, title='Posterior Infection Rate', showlegend=True):
    df = plot._posterior(df, r, c, col, a, b, gamma)
    if start is None:
        start = df.date.min()
    ymax = max(df.pos95.max(), df.data.max())
    return go.Figure(
        data = [
            *plot_lockdown(df.date.max(), 0, ymax, showlegend=False),
            plot_range(start, df.date.min(), 0, YLIM[1], None, BLUEFILL, showlegend=False),
            go.Scatter(x=df.date, y=df.pos5, showlegend=False, line_width=1, line_color=BLUE.format(0.5)),
            go.Scatter(x=df.date, y=df.pos95, showlegend=False,
                       line_width=1, line_color=BLUE.format(0.5), fillcolor=BLUEFILL, fill='tonexty'),
            go.Scatter(x=df.date, y=df.data, name='data', showlegend=showlegend,
                       line_width=2, line_color=RED.format(1)),
            go.Scatter(x=df.date, y=df.posterior, name='posterior', showlegend=showlegend,
                       mode='lines', line_width=2.5, line_color=BLUE.format(1)),
        ],
        layout = dict(
            title = title,
            xaxis = dict(range=(start, df.date.max()), showgrid=False),
            yaxis = dict(range=(0, ymax)),
            legend = dict(orientation='h', traceorder='normal')
        )
    )

def plot_reproduction(df, r, c, col='smoothed', a=1, b=3, gamma=data.GAMMA,
                      start=None, title='Effective Reproduction Number', showlegend=True):
    df = plot._posterior(df, r, c, col, a, b, gamma)
    if start is None:
        start = df.date.min()
    marker = dict(color=df.r, colorscale=BURD, cmin=0, cmax=1.5)
    return go.Figure(
        data = [
            *plot_lockdown(df.date.max(), *YLIM, showlegend=False),
            plot_range(start, df.date.min(), 0, YLIM[1], None, BLUEFILL, showlegend=False),
            go.Scatter(x=df.date, y=df.r5, showlegend=False, line_width=1, line_color=BLUE.format(0.5)),
            go.Scatter(x=df.date, y=df.r95, name='5-95th percentile', showlegend=False,
                       line_width=1, line_color=BLUE.format(0.5), fillcolor=BLUEFILL, fill='tonexty'),
            go.Scatter(x=df[df.r >= 0].date, y=df[df.r >= 0].r, name='median', showlegend=showlegend,
                       mode='lines', line_width=2.5, line_color=BLUE.format(1), marker=marker),
            go.Scatter(x=[start, df.date.max()], y=[1, 1], showlegend=False,
                       mode='lines', line_width=1.5, line_color=RED.format(1))
        ],
        layout = dict(
            title = title,
            xaxis = dict(range=(start, df.date.max()), showgrid=False),
            yaxis = dict(range=YLIM),
            legend = dict(orientation='h', traceorder='normal'),
        )
    )

def subplots(figs, title, rows=None, cols=1, hspace=0.05, vspace=0.05, width=1000, height=None):
    if rows is None:
        rows = math.ceil(len(dfs) / cols)
    if height is None:
        height = 320*rows
    titles = [f.layout.title.text for f in figs]
    fig = make_subplots(rows=rows, cols=cols, subplot_titles=titles,
                        horizontal_spacing=hspace, vertical_spacing=vspace)
    
    for i, f in enumerate(figs):
        row = i // cols + 1
        col = i % cols + 1
        xref, yref = f'x{i+1}', f'y{i+1}'
        for d in f.data:
            fig.append_trace(d, row, col)
        fig.update_xaxes(f.layout.xaxis, row=row, col=col)
        fig.update_yaxes(f.layout.yaxis, row=row, col=col)
    
    return fig.update_layout(
        title_text = title,
        legend = dict(x=0, y=-0.02, orientation='h', traceorder='normal'),
        width = width,
        height = height
    )

Article plots

In [ ]:
df = data.za_counts(start=start, end=end, min_positive=1)
df = data.smooth(df, std=std)
R, C = likelihood.opt(df, col='smoothed')
print(f'Max. likelihood r = {R}, c = {C}')
In [ ]:
fig = plot_ratio(df, (7/4, 7/3, 7/2), R, C)
fig.update_xaxes(tick0=pd.Timestamp(2020, 3, 15), dtick=28*24*60*60*1000, tickformat='%d %b')
fig.update_yaxes(range=(0.75, 1.5), tick0=1, dtick=0.2)
fig.update_layout(legend=dict(orientation='h'), width=480, height=240, margin=dict(t=40, r=0, b=0, l=20))
In [ ]:
fig = plot_infections(df)
fig.update_xaxes(tick0=pd.Timestamp(2020, 3, 15), dtick=28*24*60*60*1000, tickformat='%d %b')
fig.update_yaxes(range=(-150, df.new.max()), tick0=0, dtick=1000)
fig.update_layout(width=480, height=240, margin=dict(t=40, r=0, b=0, l=32))
In [ ]:
fig = plot_posterior(df, R, C)
fig.update_xaxes(tick0=pd.Timestamp(2020, 3, 15), dtick=28*24*60*60*1000, tickformat='%d %b')
fig.update_layout(width=480, height=240, margin=dict(t=40, r=0, b=0, l=32))
In [ ]:
fig = plot_reproduction(df, R, C)
fig.update_xaxes(tick0=pd.Timestamp(2020, 3, 15), dtick=28*24*60*60*1000, tickformat='%d %b')
fig.update_layout(width=480, height=240, margin=dict(t=40, r=0, b=0, l=8))
In [ ]:
# Make sure you run the 'data preparation' cell for this one.
figs = [plot_reproduction(df, R, C, col='smoothed', start='2020-03-05', title=r, showlegend=(i == 0))
        for i, (df, r) in enumerate(zip(dfs, regions))]
fig = subplots(figs, 'Effective Reproduction Number', width=480, height=720, vspace=0.06)
fig.update_layout(margin=dict(t=64, r=0, b=0, l=8))
In [ ]:
# fig.write_image('provincial.svg')
In [ ]: